#clear environment 
rm(list = ls())

library(readr)
library(readxl)
library(reshape2)
library(dplyr)
library(ggplot2)
library(tidyr)
library(abind)
library(zoo)
library(stringr)
library(tidylog)
library(stargazer)

# set directories
source("./Code/set_directories.R")

# Read in ACS Data
ACSdata <- read_csv(file.path(DATA.PROCESSED,"ACS2018_tract_raceincomes_noNAs.csv"), 
                    col_types = cols(.default = "d", GEOID = "c", GeoLabel = "c"))
ACSdata <- ACSdata[,-1]

#Merge ACS Data with Census Region Names
census.regions <- read_csv(file.path(DATA.PATH,"regiondivision.csv"))
names(census.regions)[1] <- "stateFIPS"
ACSdata$stateFIPS <- substr(ACSdata$GEOID,start = 1,stop = 2)
ACSdata <- left_join(ACSdata,census.regions,by = "stateFIPS")

# Zips and Tracts
tracts <- read.csv(file.path(DATA.PATH,"zcta_tract_rel_10.txt"), sep=",", header = TRUE, colClasses=c("ZCTA5"="character","STATE"="character","COUNTY"="character","TRACT"="character"))
tracts$CountyFIPS <- paste(tracts$STATE,tracts$COUNTY,sep="")

# County FIPS
countyFIPS <- read.csv(file.path(DATA.PATH,"countyFIPS.txt"))
countyFIPS <- array(unlist(countyFIPS),dim=c(3,3233))
countyFIPS <- as.data.frame(t(as.matrix(countyFIPS)))
countyFIPS <- countyFIPS[-1,]
names(countyFIPS)[1] <- "CountyFIPS"
names(countyFIPS)[2] <- "CountyName"
names(countyFIPS)[3] <- "PostalCode"

#countyFIPS <- countyFIPS[,-3]
countyFIPS$CountyFIPS <- as.character(countyFIPS$CountyFIPS)

# Merge CountyFIPS and Tracts
tracts <- left_join(x=tracts,y=countyFIPS,by="CountyFIPS")
tracts <- tracts %>% arrange(ZCTA5,desc(POPPT)) #sort by zip and then population (descending), so tract with greatest population within each zip is listed first

zips.fips.tracts <- subset(tracts, select = c("ZCTA5","GEOID","CountyFIPS","CountyName","PostalCode"))

#Collapse so that zip is only matched with one FIPS
collapsed.zips.fips.tracts <-  zips.fips.tracts[ !duplicated(zips.fips.tracts$ZCTA5), ]
names(collapsed.zips.fips.tracts)[1] <- "region"

# Load environmental benefits/damages data
env.benefits.created <- readRDS(file.path(DATA.PROCESSED,"total_damages_zips.rds"))
#env.benefits.received <- readRDS(file.path(DATA.PROCESSED,"total_damages_fips.rds"))

env.benefits.created <- left_join(env.benefits.created,collapsed.zips.fips.tracts,by="region")
####NEED TO DEAL WITH THE zips that weren't matched to counties**************
env.benefits.created.na <- subset(env.benefits.created,is.na(GEOID)==1)
env.benefits.created.na <- env.benefits.created.na[,c(1,2)]
env.benefits.created <- subset(env.benefits.created,is.na(GEOID)==0)

#now let's match the zips that weren't matched to counties to the closest zips in the collapsed.zips.fips.tracts file
env.benefits.created.na$region.num <- as.numeric(env.benefits.created.na$region)
collapsed.zips.fips.tracts$region.num <- as.numeric(collapsed.zips.fips.tracts$region)
library(purrr)
env.benefits.created.na.test = env.benefits.created.na %>% 
  dplyr::mutate(closestZip = purrr::map_dbl(.x=region.num,~collapsed.zips.fips.tracts$region.num[which.min(abs(.x-collapsed.zips.fips.tracts$region.num))])) %>% 
  dplyr::left_join(collapsed.zips.fips.tracts,by=c('closestZip'='region.num'))
env.benefits.created.na.test$zipdiff <- abs(env.benefits.created.na.test$region.num - env.benefits.created.na.test$closestZip)
env.benefits.created.na.test$region <- if_else(env.benefits.created.na.test$zipdiff<100,env.benefits.created.na.test$region.y,env.benefits.created.na.test$region.x)
env.benefits.created.na.test <- env.benefits.created.na.test[,c(11,2)]
env.benefits.created.na.test <- left_join(env.benefits.created.na.test,collapsed.zips.fips.tracts,by="region")
env.benefits.created.na.test <- env.benefits.created.na.test[,-7]
#append these to the env.benefits.created file
env.benefits.created <- rbind(env.benefits.created,env.benefits.created.na.test)

#sum benefits to the county level
env.benefits.created.county <- aggregate(value ~ CountyFIPS, data=env.benefits.created,FUN = sum)
names(env.benefits.created.county)[1] <- "region"
env.benefits.created.county$region <- as.numeric(env.benefits.created.county$region)

# Get population counts by county
ACSdata$CountyFIPS <- substr(ACSdata$GEOID,start = 1,stop = 5)
pop.county <- aggregate(Number.POP ~ CountyFIPS, data = ACSdata, FUN = sum)

# Merge these population counts with env benefits received
env.benefits.received <- readRDS(file.path(DATA.PROCESSED,"total_damages_fips.rds"))
env.benefits.received.per.cap <- left_join(pop.county,env.benefits.received,by=c("CountyFIPS" = "region"))
env.benefits.received.per.cap$env.ben.PC <- env.benefits.received.per.cap$value/env.benefits.received.per.cap$Number.POP

# Merge population counts with env benefits created
env.benefits.created.county$county <- sprintf("%05d", env.benefits.created.county$region)
env.benefits.created.per.cap <- left_join(pop.county,env.benefits.created.county,by=c("CountyFIPS" = "county"))
env.benefits.created.per.cap$env.ben.created.PC <- env.benefits.created.per.cap$value/env.benefits.created.per.cap$Number.POP
env.benefits.created.per.cap <- subset(env.benefits.created.per.cap, select = c(CountyFIPS,env.ben.created.PC))
env.benefits <- left_join(env.benefits.received.per.cap,env.benefits.created.per.cap,by="CountyFIPS")

# Merge per capita benefits with ACS data
ACSdata.env <- left_join(ACSdata,env.benefits,by="CountyFIPS")
ACSdata.env <- subset(ACSdata.env, select = -c(Number.POP.y,value)) #drop the county population column and drop the county level benefits (value)
#drop AK, HI, and PR from ACS data
ACSdata.env <- subset(ACSdata.env,stateFIPS!="02") #drop AK
ACSdata.env <- subset(ACSdata.env,stateFIPS!="15") #drop HI
ACSdata.env <- subset(ACSdata.env,stateFIPS!="60"&stateFIPS!="66"&stateFIPS!="69"&stateFIPS!="72"&stateFIPS!="78") #drop Guam, NMI, PR, VI

sum(is.na(ACSdata.env$env.ben.PC)) #counts number of NAs in env.ben.PC column
#********Let's drop these for now. Need to fix this later.*************#
whatsmissing <- subset(ACSdata.env,is.na(env.ben.PC)==1)
ACSdata.env <- subset(ACSdata.env,is.na(env.ben.PC)==0)

#Drop if missing Median Household Income (MedianHH)
ACSdata.env$Median.HH <- as.numeric(ACSdata.env$Median.HH)
ACSdata.env <- subset(ACSdata.env,is.na(Median.HH)==0)

#Calculate total benefits in each Census block group
ACSdata.env$env.ben <- ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.x
total.income <- sum(ACSdata.env$Median.HH*ACSdata.env$Number.HH)


# Create Table A.2 (Env Ben per Cap by Race and Income) and do this allowing median income to vary according to race group
ACSdata.env$decile <- with(ACSdata.env, cut(Median.HH, 
                                            breaks=quantile(Median.HH, probs=seq(0,1, by=0.1), na.rm=TRUE), 
                                            include.lowest=TRUE))
ACSdata.env$decile <- as.numeric(ACSdata.env$decile)
ACSdata.env$decile.black <- with(ACSdata.env, cut(Median.HH.black, 
                                                  breaks=quantile(Median.HH, probs=seq(0,1, by=0.1), na.rm=TRUE), 
                                                  include.lowest=TRUE))
ACSdata.env$decile.black <- as.numeric(ACSdata.env$decile.black)
ACSdata.env$decile.whitealone <- with(ACSdata.env, cut(Median.HH.whitealone, 
                                                       breaks=quantile(Median.HH, probs=seq(0,1, by=0.1), na.rm=TRUE), 
                                                       include.lowest=TRUE))
ACSdata.env$decile.whitealone <- as.numeric(ACSdata.env$decile.whitealone)
ACSdata.env$decile.latino <- with(ACSdata.env, cut(Median.HH.latino, 
                                                   breaks=quantile(Median.HH, probs=seq(0,1, by=0.1), na.rm=TRUE), 
                                                   include.lowest=TRUE))
ACSdata.env$decile.latino <- as.numeric(ACSdata.env$decile.latino)
ACSdata.env$decile.asian <- with(ACSdata.env, cut(Median.HH.asian, 
                                                  breaks=quantile(Median.HH, probs=seq(0,1, by=0.1), na.rm=TRUE), 
                                                  include.lowest=TRUE))
ACSdata.env$decile.asian <- as.numeric(ACSdata.env$decile.asian)


tableA2.stats.total.col <- ACSdata.env %>% 
  group_by(decile) %>% 
  summarize(wt.avg = sum(env.ben)/sum(Number.POP.x))
tableA2.stats.black.col <- ACSdata.env %>% 
  group_by(decile.black) %>% 
  summarize(wt.avg.black = sum(env.ben.PC*Number.POP.black)/sum(Number.POP.black)) %>%
  filter(!is.na(decile.black))
tableA2.stats.white.col <- ACSdata.env %>% 
  group_by(decile.whitealone) %>% 
  summarize(wt.avg.whitealone = sum(env.ben.PC*Number.POP.whitealone)/sum(Number.POP.whitealone)) %>%
  filter(!is.na(decile.whitealone))
tableA2.stats.latino.col <- ACSdata.env %>% 
  group_by(decile.latino) %>% 
  summarize(wt.avg.latino = sum(env.ben.PC*Number.POP.latino)/sum(Number.POP.latino)) %>%
  filter(!is.na(decile.latino))
tableA2.stats.asian.col <- ACSdata.env %>% 
  group_by(decile.asian) %>% 
  summarize(wt.avg.asian = sum(env.ben.PC*Number.POP.asian)/sum(Number.POP.asian)) %>%
  filter(!is.na(decile.asian))

#put Table together and print it to file
tableA2.stats.tract <- tableA2.stats.black.col %>%
  rename(decile = decile.black) %>%
  left_join(tableA2.stats.latino.col,by=c("decile"="decile.latino")) %>%
  left_join(tableA2.stats.asian.col,by=c("decile"="decile.asian")) %>%
  left_join(tableA2.stats.white.col,by=c("decile"="decile.whitealone")) %>%
  left_join(tableA2.stats.total.col,by=c("decile"="decile")) %>%
  mutate(across(-decile,~round(as.numeric(.x),digits=3))) %>%
  rename("Income Decile"=decile,Black=wt.avg.black,"Hispanic/Latinx"=wt.avg.latino,Asian=wt.avg.asian,White=wt.avg.whitealone,All=wt.avg)

tableA2.totalrow.black <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.black)/sum(ACSdata.env$Number.POP.black),digits=3)
tableA2.totalrow.latino <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.latino)/sum(ACSdata.env$Number.POP.latino),digits=3)
tableA2.totalrow.asian <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.asian)/sum(ACSdata.env$Number.POP.asian),digits=3)
tableA2.totalrow.whitealone <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.whitealone)/sum(ACSdata.env$Number.POP.whitealone),digits=3)
tableA2.totalrow.total <- round(sum(ACSdata.env$env.ben.PC*ACSdata.env$Number.POP.x)/sum(ACSdata.env$Number.POP.x),digits=3)

tableA2.stats.tract <- rbind(tableA2.stats.tract,c("Total",tableA2.totalrow.black,tableA2.totalrow.latino,tableA2.totalrow.asian,tableA2.totalrow.whitealone,tableA2.totalrow.total))

byLine <- do.call("c", strsplit(capture.output(stargazer(tableA2.stats.tract, summary=FALSE, rownames=FALSE, float=F)),"\n"))
result <- append(byLine,c(" & \\multicolumn{5}{c}{Demographic Group} \\\\ \\cmidrule{2-6}"), after = c(4))
cat(paste(result, collapse = "\n"),file = file.path(TABLES.OUT,"tab_deciles_races_tracts_noNAs.tex"))

